In [8]:
# imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
from IPython.display import (
%matplotlib inline
For many variables we will use vectorized implementation $$X=\left[\begin{array}{cc} 1 & (\vec x^{(1)})^T \\ 1 & (\vec x^{(2)})^T \\ \vdots & \vdots\\ 1 & (\vec x^{(m)})^T \\ \end{array}\right] = \left[\begin{array}{cccc} 1 & x_1^{(1)} & \cdots & x_n^{(1)} \\ 1 & x_1^{(2)} & \cdots & x_n^{(2)} \\ \vdots & \vdots & \ddots & \vdots\\ 1 & x_1^{(m)} & \cdots & x_n^{(m)} \\ \end{array}\right] $$
$$\vec{y} = \left[\begin{array}{c} y^{(1)}\\ y^{(2)}\\ \vdots\\ y^{(m)}\\ \end{array}\right] \quad \theta = \left[\begin{array}{c} \theta_0\\ \theta_1\\ \vdots\\ \theta_n\\ \end{array}\right]$$
Vectorized implementation is much faster than that one from previous Lecture.
In [14]:
df = pd.read_csv("ex1data1.txt", header=None)
df.columns = columns=['x', 'y']
X = np.matrix(df.x.values[:, np.newaxis])
# adding theta_0
m = len(X)
X = np.concatenate((np.ones((1,m)).T, X), axis=1)
y = np.matrix(df.y.values[:, np.newaxis])
theta = np.matrix([-5, 1.3]).reshape(2, 1)
In [15]:
print 'X', X[:10]
print 'y', y[:10]
print 'theta', theta
In [3]:
def JMx(theta, X, y):
m = len(y)
J = 1.0/(2.0*m)*((X*theta-y).T*(X*theta-y))
return J.item()
In [16]:
error = JMx(theta, X, y)
display(Math(r'\Large J(\theta) = %.4f' % error))
In [ ]:
We can count $\hat\theta$ using this equation: $$\theta = (X^TX)^{-1}X^T \vec y$$
In [ ]:
Gradient Method | Normal matrix |
need to choose $\alpha$ | no need to choose $\alpha$ |
needs many iterations | no iterations |
it works for large amount of features (x) | slow for large amount of features (x) |
we need to count inverse matrix |
In [17]:
from sklearn.linear_model import (
In [ ]: